x86: Make assigned devices' interrupts delivery to right vcpu.
authorKeir Fraser <keir.fraser@citrix.com>
Fri, 25 Sep 2009 09:50:18 +0000 (10:50 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Fri, 25 Sep 2009 09:50:18 +0000 (10:50 +0100)
This patch targets for reducing IPIs when delivery VT-d's devices'
intrs to target vcpus.  According to the experiments leveraging 10G
Oplin NIC card, CPU utilization can reduce 5%-6% and NIC's bandwidth
keeps unchanged through testings.  And this patch can always benefit
UP guests with MSI-capable devices assigned and SMP guests whose
lapic's destination mode is physical mode.  And also it can benefit
SMP guests whose lapic's dest_mode is logical mode but only one
destination is specified.  So it should cover major cases in real
environment. Currenlty, this patch intercepts the programming for MSI
intr status, and caculate the destination id for the pirq when do the
programming in advance.  When vcpu migratio n occurs or guest
re-programe MSI status, it checks that whether needs to set the
corresponding pirq's affinity of assigned devices and make vcpu's
affinity and pirq's consistent to reduce the IPIs eventually.

Signed-off-by : Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/vmsi.c
xen/arch/x86/hvm/vmx/vmcs.c
xen/arch/x86/irq.c
xen/drivers/passthrough/io.c
xen/include/asm-x86/hvm/hvm.h
xen/include/asm-x86/irq.h
xen/include/xen/hvm/irq.h

index 81d133ff8bed6a9d3df9a605e1f8f8b986706b9d..7748b4579e35ce7e1313d6a9ee5f8a11c970df4d 100644 (file)
@@ -211,6 +211,35 @@ void hvm_migrate_timers(struct vcpu *v)
     pt_migrate(v);
 }
 
+void hvm_migrate_pirqs(struct vcpu *v)
+{
+    int pirq, irq;
+    struct irq_desc *desc;
+    struct domain *d = v->domain;
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+    
+    if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
+       return;
+
+    spin_lock(&d->event_lock);
+    for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
+          pirq < d->nr_pirqs;
+          pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1) )
+    {
+        if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) ||
+               (hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id != v->vcpu_id) )
+            continue;
+        desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL);
+        if (!desc)
+            continue;
+        irq = desc - irq_desc;
+        ASSERT(MSI_IRQ(irq));
+        desc->handler->set_affinity(irq, *cpumask_of(v->processor));
+        spin_unlock_irq(&desc->lock);
+    }
+    spin_unlock(&d->event_lock);
+}
+
 void hvm_do_resume(struct vcpu *v)
 {
     ioreq_t *p;
index 3662a4c757b28c4d65afbab0e7fbd822bac8a7fa..e312c442d7bc6fb8754c401506856383e7523881 100644 (file)
@@ -124,6 +124,29 @@ int vmsi_deliver(struct domain *d, int pirq)
     return 1;
 }
 
+/* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
+int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
+{
+    int dest_vcpu_id = -1, w = 0;
+    struct vcpu *v;
+    
+    if ( d->max_vcpus == 1 )
+        return 0;
+    for_each_vcpu ( d, v )
+    {
+        if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) ) 
+        {
+            w++;
+            dest_vcpu_id = v->vcpu_id;
+        }
+    }
+    if ( w > 1 )
+        return -1;
+
+    return dest_vcpu_id;
+}
+
 /* MSI-X mask bit hypervisor interception */
 struct msixtbl_entry
 {
index 7d3ea0b1aa4def919307af9ccb056425721269ed..11dc4685212149f36c0a40a9bfcc48b8ed487d5a 100644 (file)
@@ -940,6 +940,7 @@ void vmx_do_resume(struct vcpu *v)
         vmx_clear_vmcs(v);
         vmx_load_vmcs(v);
         hvm_migrate_timers(v);
+        hvm_migrate_pirqs(v);
         vmx_set_host_env(v);
         vpid_sync_vcpu_all(v);
     }
index 620d6b3f5ae8bf1954b54cea0903121b22f6196d..b780d03ed8e197605a4689a9ba77a0bf916ac4b7 100644 (file)
@@ -1517,7 +1517,7 @@ static void dump_irqs(unsigned char key)
             /* Only show CPU0 - CPU31's affinity info.*/
             printk("   IRQ:%4d, IRQ affinity:0x%08x, Vec:%3d type=%-15s"
                     " status=%08x mapped, unbound\n",
-                   irq, *(int*)cfg->domain.bits, cfg->vector,
+                   irq, *(int*)desc->affinity.bits, cfg->vector,
                     desc->handler->typename, desc->status);
         else
         {
@@ -1525,7 +1525,7 @@ static void dump_irqs(unsigned char key)
 
             printk("   IRQ:%4d, IRQ affinity:0x%08x, Vec:%3d type=%-15s "
                     "status=%08x in-flight=%d domain-list=",
-                   irq, *(int*)cfg->domain.bits, cfg->vector,
+                   irq, *(int*)desc->affinity.bits, cfg->vector,
                    desc->handler->typename, desc->status, action->in_flight);
 
             for ( i = 0; i < action->nr_guests; i++ )
index 198c08962d8cb9b7d2f5360bb1bbd95d0ec03bdc..052ab8d1354a08dc46a3a068605acd848feb2927 100644 (file)
@@ -139,8 +139,10 @@ int pt_irq_create_bind_vtd(
         bitmap_zero(hvm_irq_dpci->mapping, d->nr_pirqs);
         memset(hvm_irq_dpci->hvm_timer, 0, 
                 nr_irqs * sizeof(*hvm_irq_dpci->hvm_timer));
-        for ( int i = 0; i < d->nr_pirqs; i++ )
+        for ( int i = 0; i < d->nr_pirqs; i++ ) {
             INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
+            hvm_irq_dpci->mirq[i].gmsi.dest_vcpu_id = -1;
+        }
         for ( int i = 0; i < NR_HVM_IRQS; i++ )
             INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
 
@@ -154,6 +156,8 @@ int pt_irq_create_bind_vtd(
 
     if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
     {
+        uint8_t dest, dest_mode;
+        int dest_vcpu_id;
 
         if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping))
         {
@@ -195,6 +199,14 @@ int pt_irq_create_bind_vtd(
             hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
             hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
         }
+        /* Caculate dest_vcpu_id for MSI-type pirq migration */
+        dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK;
+        dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK);
+        dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
+        hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id = dest_vcpu_id;
+        spin_unlock(&d->event_lock);
+        if ( dest_vcpu_id >= 0 )
+            hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
     }
     else
     {
@@ -278,8 +290,8 @@ int pt_irq_create_bind_vtd(
         gdprintk(XENLOG_INFO VTDPREFIX,
                  "VT-d irq bind: m_irq = %x device = %x intx = %x\n",
                  machine_gsi, device, intx);
+        spin_unlock(&d->event_lock);
     }
-    spin_unlock(&d->event_lock);
     return 0;
 }
 
index 1a905272b09f69544646d3f8446d8a2a7039115f..76e06d7db62dc434050c0b068a0b4444857fdc48 100644 (file)
@@ -157,6 +157,8 @@ void hvm_init_guest_time(struct domain *d);
 void hvm_set_guest_time(struct vcpu *v, u64 guest_time);
 u64 hvm_get_guest_time(struct vcpu *v);
 
+int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode);
+
 #define hvm_paging_enabled(v) \
     (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG))
 #define hvm_wp_enabled(v) \
@@ -230,6 +232,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                    unsigned int *ecx, unsigned int *edx);
 void hvm_migrate_timers(struct vcpu *v);
 void hvm_do_resume(struct vcpu *v);
+void hvm_migrate_pirqs(struct vcpu *v);
 
 static inline void
 hvm_inject_exception(unsigned int trapnr, int errcode, unsigned long cr2)
index 15fd2588f8f62554a4eb786402a246659ccbb759..7a1c0e9d9b7e3d73ae3585c2062fb42bee943897 100644 (file)
@@ -112,6 +112,8 @@ void destroy_irq(unsigned int irq);
 struct irq_desc;
 extern void irq_complete_move(struct irq_desc **descp);
 
+extern struct irq_desc *irq_desc;
+
 void lock_vector_lock(void);
 void unlock_vector_lock(void);
 
index af298e5fe3bfd93f5160fdb4481f4ca9ab4cdb5e..a4cd6fc33736543a4510ee8fd824144b45eb5f5f 100644 (file)
@@ -50,6 +50,7 @@ struct dev_intx_gsi_link {
 struct hvm_gmsi_info {
     uint32_t gvec;
     uint32_t gflags;
+    int dest_vcpu_id; /* -1 :multi-dest, non-negative: dest_vcpu_id */
 };
 
 struct hvm_mirq_dpci_mapping {